import json


def process_jsonl_file(input_path, output_path):
    """
    处理JSONL文件，提取found_files、found_modules、found_entities的第一个元素
    """
    processed_data = []

    with open(input_path, "r", encoding="utf-8") as file:
        for line in file:
            if line.strip():  # 跳过空行
                data = json.loads(line)

                # 处理每个字段，取第一个元素（如果存在）
                if (
                    "found_files" in data
                    and isinstance(data["found_files"], list)
                    and data["found_files"]
                ):
                    data["found_files"] = data["found_files"][1]
                else:
                    data["found_files"] = None

                if (
                    "found_modules" in data
                    and isinstance(data["found_modules"], list)
                    and data["found_modules"]
                ):
                    data["found_modules"] = data["found_modules"][1]
                else:
                    data["found_modules"] = None

                if (
                    "found_entities" in data
                    and isinstance(data["found_entities"], list)
                    and data["found_entities"]
                ):
                    data["found_entities"] = data["found_entities"][1]
                else:
                    data["found_entities"] = None

                processed_data.append(data)

    # 写入处理后的数据
    with open(output_path, "w", encoding="utf-8") as file:
        for data in processed_data:
            file.write(json.dumps(data, ensure_ascii=False) + "\n")

    print(f"处理完成！共处理了 {len(processed_data)} 条记录")
    print(f"输出文件：{output_path}")


# 执行处理
input_file = "/data_ext/ref_code/LocAgent/output1/loc_outputs.jsonl"
output_file = "/data_ext/ref_code/LocAgent/output1/loc_outputs_processed2.jsonl"

process_jsonl_file(input_file, output_file)
